rm(list = ls())
library(data.table)
library(tidyr)
library(maps)
library(haven)
library(ggplot2)
library(dplyr)
library(readxl)
library(ggrepel)
library(wordcloud)
library(lme4)
library(lmerTest)
library(reshape2)
library(patchwork)
library(psych)
PREP THE DATASET FOR ANALYSIS WVS 5 & 6 ####################
#read the data (Wave 5)
# Data of Wave 5
WV5_data <- readRDS("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/F00007944-WV5_Data_R_v20180912.rds")
# Convert WV5_data-object in data.frame
WV5_data_df <- as.data.frame(WV5_data)
# show first five columns
WV5_data_df
#rename the variables
WV5_data <- WV5_data_df %>%
rename(gender = V235, age = V237, country_code = V2, wave = V1, risktaking = V86, children = V56, married = V55, employed = V241, education = V238)
WV5_data
colnames(WV5_data)
[1] "wave" "V1A" "V1B" "country_code" "V2A" "V3" "V4"
[8] "V4_CO" "V5" "V5_CO" "V6" "V6_CO" "V7" "V7_CO"
[15] "V8" "V8_CO" "V9" "V9_CO" "V10" "V11" "V12"
[22] "V13" "V14" "V15" "V16" "V17" "V18" "V19"
[29] "V20" "V21" "V22" "V23" "V24" "V25" "V26"
[36] "V27" "V28" "V29" "V30" "V31" "V32" "V33"
[43] "V34" "V35" "V36" "V37" "V38" "V39" "V40"
[50] "V41" "V42" "V43" "V43_01" "V43_02" "V43_03" "V43_04"
[57] "V43_05" "V43_06" "V43_07" "V43_08" "V43_09" "V43_10" "V43_11"
[64] "V43_12" "V43_13" "V43_14" "V43_15" "V43_16" "V43_17" "V43_18"
[71] "V43_19" "V43_20" "V43_21" "V43_22" "V43_23" "V43_24" "V43_25"
[78] "V43_26" "V43_27" "V43_28" "V43_29" "V43_30" "V44" "V45"
[85] "V46" "V47" "V48" "V49" "V50" "V51" "V52"
[92] "V53" "V54" "married" "children" "V57" "V58" "V59"
[99] "V60" "V61" "V62" "V63" "V64" "V65" "V66"
[106] "V67" "V68" "V69" "V69_HK" "V70" "V70_HK" "V71"
[113] "V72" "V73" "V73_HK" "V74" "V74_HK" "V75" "V76"
[120] "V77" "V78" "V79" "V80" "V81" "V82" "V83"
[127] "V84" "V85" "risktaking" "V87" "V88" "V89" "V90"
[134] "V91" "V92" "V93" "V94" "V95" "V96" "V97"
[141] "V98" "V99" "V100" "V101" "V102" "V103" "V104"
[148] "V105" "V106" "V107" "V108" "V109" "V110" "V111"
[155] "V112" "V113" "V114" "V115" "V116" "V117" "V118"
[162] "V119" "V120" "V121" "V122" "V123" "V124" "V125"
[169] "V126" "V127" "V128" "V129" "V130" "V130_CA_1" "V130_IQ_1"
[176] "V130_IQ_2" "V130_IQ_3" "V130_IQ_4" "V130_NZ_1" "V130_NZ_2" "V131" "V132"
[183] "V133" "V134" "V135" "V136" "V137" "V138" "V139"
[190] "V140" "V141" "V142" "V143" "V144" "V145" "V146_00"
[197] "V146_01" "V146_02" "V146_03" "V146_04" "V146_05" "V146_06" "V146_07"
[204] "V146_08" "V146_09" "V146_10" "V146_11" "V146_12" "V146_13" "V146_14"
[211] "V146_15" "V146_16" "V146_17" "V146_18" "V146_19" "V146_20" "V146_21"
[218] "V146_22" "V147" "V148" "V149" "V150" "V151" "V151_IQ_A"
[225] "V151_IQ_B" "V152" "V153" "V154" "V155" "V156" "V157"
[232] "V158" "V159" "V160" "V161" "V162" "V163" "V164"
[239] "V165" "V166" "V167" "V168" "V169" "V170" "V171"
[246] "V172" "V173" "V174" "V175" "V176" "V177" "V178"
[253] "V179" "V180" "V181" "V182" "V183" "V184" "V185"
[260] "V186" "V187" "V188" "V189" "V190" "V191" "V192"
[267] "V193" "V194" "V195" "V196" "V197" "V198" "V199"
[274] "V200" "V201" "V202" "V203" "V204" "V205" "V206"
[281] "V207" "V208" "V209" "V210" "V211" "V212" "V213A"
[288] "V213B" "V213C" "V213D" "V213E" "V213F" "V213G" "V213H"
[295] "V213K" "V213L" "V213M" "V213N" "V214" "V215" "V216"
[302] "V217" "V218" "V219" "V220" "V221" "V222" "V223"
[309] "V224" "V225" "V226" "V227" "V228" "V229" "V230"
[316] "V231" "V232" "V233" "V233A" "V234" "gender" "V236"
[323] "age" "education" "V238CS" "V239" "V240" "employed" "V242"
[330] "V242A_CO" "V243" "V244" "V245" "V246" "V247" "V248"
[337] "V249" "V250" "V251" "V252" "V252B" "V253" "V253CS"
[344] "V254" "V255" "V255CS" "V256" "V257" "V257B" "V257C"
[351] "V258" "V259" "V259A" "V260" "V261" "V262" "V263"
[358] "V264" "V265" "S024" "S025" "Y001" "Y002" "Y003"
[365] "SACSECVAL" "SECVALWGT" "RESEMAVAL" "WEIGHTB" "I_AUTHORITY" "I_NATIONALISM" "I_DEVOUT"
[372] "DEFIANCE" "WEIGHT1A" "I_RELIGIMP" "I_RELIGBEL" "I_RELIGPRAC" "DISBELIEF" "WEIGHT2A"
[379] "I_NORM1" "I_NORM2" "I_NORM3" "RELATIVISM" "WEIGHT3A" "I_TRUSTARMY" "I_TRUSTPOLICE"
[386] "I_TRUSTCOURTS" "SCEPTICISM" "WEIGHT4A" "I_INDEP" "I_IMAGIN" "I_NONOBED" "AUTONOMY"
[393] "WEIGHT1B" "I_WOMJOB" "I_WOMPOL" "I_WOMEDU" "EQUALITY" "WEIGHT2B" "I_HOMOLIB"
[400] "I_ABORTLIB" "I_DIVORLIB" "CHOICE" "WEIGHT3B" "I_VOICE1" "I_VOICE2" "I_VOI2_00"
[407] "VOICE" "WEIGHT4B" "S001" "S007" "S018" "S019" "S021"
[414] "COW"
#select only the variables of interest
WV5_data <- WV5_data %>%
dplyr::select(gender, age, country_code, wave, risktaking, children, employed, education, married)
WV5_data
countrynames <- read.csv("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/countrynames.txt", header = FALSE, as.is = TRUE)
colnames(countrynames) <- c("code", "name")
# Assuming WV5_data has a column named country_code
WV5_data$country <- countrynames$name[match(WV5_data$country_code, countrynames$code)]
# Check the frequency of each country in the new column
table(WV5_data$country)
Andorra Argentina Australia Brazil Bulgaria
1003 1002 1421 1500 1001
Burkina Faso Canada Chile China Colombia
1534 2164 1000 1991 3025
Cyprus (G) Egypt Ethiopia Finland France
1050 3051 1500 1014 1001
Georgia Germany Ghana Great Britain Guatemala
1500 2064 1534 1041 1000
Hong Kong Hungary India Indonesia Iran
1252 1007 2001 2015 2667
Iraq Italy Japan Jordan Malaysia
2701 1012 1096 1200 1201
Mali Mexico Moldova Morocco Netherlands
1534 1560 1046 1200 1050
New Zealand Norway Peru Poland Romania
954 1025 1500 1000 1776
Russia Rwanda Serbia and Montenegro Slovenia South Africa
2033 1507 1220 1037 2988
South Korea Spain Sweden Switzerland Taiwan
1200 1200 1003 1241 1227
Thailand Trinidad and Tobago Turkey Ukraine United States
1534 1002 1346 1000 1249
Uruguay Viet Nam Zambia
1000 1495 1500
# Display the updated WV5_data
print(WV5_data)
#Read Dataset (Wave 6)
load("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/WV6_Data_R_v20201117.rdata")
WV6_data <- WV6_Data_R_v20201117
print(WV6_data)
#rename variables
WV6_data <- WV6_data %>%
rename(wave = V1, gender = V240, age = V242,country_code = V2, risktaking = V76, children = V58, married = V57, employed = V229, education = V248)
#select only the variables of interest
WV6_data <- WV6_data %>%
dplyr::select(gender, age, country_code, wave, risktaking, children, employed, education, married)
WV6_data
#decode daraset (Wave 6)
countrynames = read.csv("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV6_data$country = countrynames$name [match(WV6_data$country_code, countrynames$code)]
table(WV6_data$country)
Algeria Argentina Armenia Australia Azerbaijan Belarus
1200 1030 1100 1477 1002 1535
Brazil Chile China Colombia Cyprus (G) Ecuador
1486 1000 2300 1512 1000 1202
Egypt Estonia Georgia Germany Ghana Haiti
1523 1533 1202 2046 1552 1996
Hong Kong India Iraq Japan Jordan Kazakhstan
1000 4078 1200 2443 1200 1500
Kuwait Kyrgyzstan Lebanon Libya Malaysia Mexico
1303 1500 1200 2131 1300 2000
Morocco Netherlands New Zealand Nigeria Pakistan Palestine
1200 1902 841 1759 1200 1000
Peru Philippines Poland Qatar Romania Russia
1210 1200 966 1060 1503 2500
Rwanda Singapore Slovenia South Africa South Korea Spain
1527 1972 1069 3531 1200 1189
Sweden Taiwan Thailand Trinidad and Tobago Tunisia Turkey
1206 1238 1200 999 1205 1605
Ukraine United States Uruguay Uzbekistan Yemen Zimbabwe
1500 2232 1000 1500 1000 1500
WV6_data
#combine the 2 dataset (Wave 6 + Wave 5)
WVS_data = rbind(WV5_data, WV6_data)
WVS_data
#exclusion of participants and omission of missing data (na)
WVS_data = subset(WVS_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
### WVS_data <- na.omit(WVS_data) ### excluded because it is not in code from Mata et al., 2016
# Use the mutate function to change the country name
WVS_data <- WVS_data %>%
mutate(country = ifelse(country == "Great Britain", "United Kingdom", country))
head(WVS_data)
length(unique(WVS_data$country))
[1] 77
nrow(WVS_data) # number of individuals
[1] 149626
range(WVS_data$age, na.rm=TRUE)
[1] 15 99
table(WVS_data$gender) # sex table(data$sex)/nrow(data)
1 2
71689 77937
WVS_data$agecat[WVS_data$age<20]="15-19"
WVS_data$agecat[WVS_data$age>=20 & WVS_data$age <30] = "20-29"
WVS_data$agecat[WVS_data$age>=30 & WVS_data$age <40] = "30-39"
WVS_data$agecat[WVS_data$age>=40 & WVS_data$age <50] = "40-49"
WVS_data$agecat[WVS_data$age>=50 & WVS_data$age <60] = "50-59"
WVS_data$agecat[WVS_data$age>=60 & WVS_data$age <70] = "60-69"
WVS_data$agecat[WVS_data$age>=70 & WVS_data$age <80] = "70-79"
WVS_data$agecat[WVS_data$age>=80] = "80+"
table(WVS_data$age)
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
56 377 530 3386 3309 3734 3373 3605 3766 3639 3858 3532 3584 3500 3252 3969 3059 3302 2959 2917 3585 3073 2883 3054 2737 3600
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
2582 3114 2759 2597 3130 2570 2490 2453 2384 2990 2227 2407 2111 2140 2408 2069 2004 1909 1597 2202 1579 1764 1607 1416 1722 1352
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
1155 1239 995 1380 907 1002 896 761 782 687 590 485 395 429 337 283 264 231 212 97 71 48 49 39 16 12
93 94 95 97 98 99
14 12 6 4 4 2
table(WVS_data$agecat)
15-19 20-29 30-39 40-49 50-59 60-69 70-79 80+
7658 35843 31538 27679 21862 15031 7885 2130
# Neue Spalte 'education_cat' erstellen und initialisieren
WVS_data$education_cat <- NA
# Kategorien zuweisen basierend auf den Bildungsstufen
WVS_data$education_cat <- ifelse(WVS_data$education %in% c(1, 2), "incomplete or no primary education",
ifelse(WVS_data$education %in% c(3, 4, 5, 6), "No Uni",
ifelse(WVS_data$education %in% c(7, 8, 9), "Uni", NA)))
# Tabelle der neuen Kategorien anzeigen
table(WVS_data$education_cat)
incomplete or no primary education No Uni Uni
19354 70033 60239
WVS_data$gender = ifelse(WVS_data$gender == 1, 0, 1) # sex: male vs. female
WVS_data$children = ifelse(WVS_data$children == 0, 0, 1) # children: no vs. yes
WVS_data$married = ifelse(WVS_data$married == 1, 1, 0) # married: yes vs. no
WVS_data$employed = ifelse(WVS_data$employed < 4, 1, 0) # employed: yes vs. no
WVS_data$education = ifelse(WVS_data$education < 4, 0, 1) # education: no primary vs. primary+
head(WVS_data)
length(unique(WVS_data$country))
[1] 77
nrow(WVS_data)
[1] 149626
range(WVS_data$age)
[1] 15 99
table(WVS_data$gender)
0 1
71689 77937
table(WVS_data$children)
0 1
44434 105192
table(WVS_data$married)
0 1
66354 83272
table(WVS_data$employed)
0 1
69519 80107
table(WVS_data$education)
0 1
38011 111615
mean(WVS_data$age)
[1] 41.59569
mean(WVS_data$risktaking)
[1] 3.801345
PREP THE DATASET FOR ANALYSIS HARDSHIP ####################
csv_path <- "/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/Data_Mata_et_al._2016/countryfacts_selection_new.csv"
countryfacts <- read.csv(csv_path, as.is = TRUE, header = TRUE)
labels=c("code","country","codeWVS","Homicide","GDP","InfMort","LifeExp","GINI","GenderPEdu")
names(countryfacts)=labels
missings=colSums(is.na(countryfacts[,4:9]))/77 # proportion of missing values for each of the hardship indicators
unique(WVS_data$country_code) %in% countryfacts$codeWVS # check that all countries in the subset of the WVS data are included in the countryfacts file
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[26] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[76] TRUE TRUE
countryfacts
mean(countryfacts$Homicide, na.rm = TRUE)
[1] 6.779793
mean(countryfacts$GDP, na.rm = TRUE)
[1] 20123.38
mean(countryfacts$LifeExp, na.rm = TRUE)
[1] 73.25052
# Plot histogram of all hardship indicators
combined_plot <- NULL # Leeres Plot-Objekt erstellen
# Define the vector of labels for the items
items <- c("Homicide","GDP","InfMort","LifeExp","GINI","GenderPEdu")
# Loop durch jedes Item und füge das Histogramm zum kombinierten Plot hinzu
for (item in items) {
# Erstelle ein Histogramm für das aktuelle Item
plot <- ggplot(countryfacts, aes_string(x = item)) +
geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
labs(title = paste(item),
x = item,
y = "Frequency") +
theme_minimal()
# Füge das Histogramm zum kombinierten Plot hinzu
if (is.null(combined_plot)) {
combined_plot <- plot
} else {
combined_plot <- combined_plot + plot
}
}
# Zeige den kombinierten Plot an
combined_plot
countryfacts$Homicide=log(countryfacts$Homicide)
countryfacts$GDP=log(countryfacts$GDP)
countryfacts$InfMort=log(countryfacts$InfMort)
countryfacts$LifeExp=log(countryfacts$LifeExp)
#countryfacts$GINI=log(countryfacts$GINI) # not transformed
countryfacts$GenderPEdu=log(countryfacts$GenderPEdu)
countryfacts
mean(countryfacts$Homicide, na.rm = TRUE)
[1] 1.31584
mean(countryfacts$GDP, na.rm = TRUE)
[1] 9.413843
mean(countryfacts$LifeExp, na.rm = TRUE)
[1] 4.285506
# Reverse Codierung
countryfacts$Homicide=scale(countryfacts$Homicide)
countryfacts$GDP=scale(-countryfacts$GDP)
countryfacts$InfMort=scale(countryfacts$InfMort)
countryfacts$LifeExp=scale(-countryfacts$LifeExp)
countryfacts$GINI=scale(countryfacts$GINI)
countryfacts$GenderPEdu=scale(-countryfacts$GenderPEdu)
countryfacts
# IMPUTE hardship indicators w/ median
for (counter in 4:9)
{
countryfacts[is.na(countryfacts[,counter]),counter]=median(countryfacts[,counter],na.rm=TRUE)
}
countryfacts
countryfacts$hardship <- rowMeans(countryfacts[, c("Homicide", "GDP", "GINI", "LifeExp", "InfMort", "GenderPEdu")], na.rm = TRUE)
countryfacts
mean(countryfacts$Homicide, na.rm = TRUE)
[1] 0.005702554
mean(countryfacts$GDP, na.rm = TRUE)
[1] -0.001846521
mean(countryfacts$LifeExp, na.rm = TRUE)
[1] -0.003187438
# Plot histogram of all hardship indicators after log transform
# Leeres Plot-Objekt erstellen
combined_plot <- NULL
# Define the vector of labels for the items
items <- c("Homicide", "GDP", "GINI", "LifeExp", "InfMort", "GenderPEdu", "hardship")
# Loop durch jedes Item und füge das Histogramm zum kombinierten Plot hinzu
for (item in items) {
# Erstelle ein Histogramm für das aktuelle Item
plot <- ggplot(countryfacts, aes_string(x = item)) +
geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
labs(title = paste(item),
x = item,
y = "Frequency") +
theme_minimal()
# Füge das Histogramm zum kombinierten Plot hinzu
if (is.null(combined_plot)) {
combined_plot <- plot
} else {
combined_plot <- combined_plot + plot
}
}
# Zeige den kombinierten Plot an
combined_plot
panel.cor = function(x, y, digits = 2, ...)
{
usr = par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
# correlation coefficient
r = cor(x, y,use="complete.obs")
txt = format(c(r, 0.123456789), digits = digits)[1]
txt = paste("r= ", txt, sep = "")
text(0.5, 0.6, txt)
# p-value calculation
p = cor.test(x, y,use="complete.obs")$p.value
txt2 = format(c(p, 0.123456789), digits = digits)[1]
txt2 = paste("p= ", txt2, sep = "")
if(p<0.01) txt2 = paste("p ", "<0.01", sep = "")
text(0.5, 0.4, txt2)
}
pairs(countryfacts[,4:10], upper.panel = panel.cor,las=1,cex.labels=.9)
dev.print(postscript,"scatter_indicators.eps",width=8, height=8,horizontal=FALSE,onefile=FALSE)
quartz_off_screen
2
library(psych)
# Subset der Hardship-Indikatoren aus dem countryfacts-Datensatz auswählen
hardship_subset <- countryfacts[, c("Homicide", "GDP", "InfMort", "LifeExp", "GINI", "GenderPEdu")]
# Cronbach's Alpha berechnen
alpha_result <- alpha(hardship_subset)
Number of categories should be increased in order to count frequencies.
alpha_result
Reliability analysis
Call: alpha(x = hardship_subset)
95% confidence boundaries
Reliability if an item is dropped:
Item statistics
# Ersetzen Sie "USA" durch "United States" im countryfacts-Datensatz
countryfacts$country[countryfacts$country == "USA"] <- "United States"
# Zusammenführen der 'hardship'-Variable von countryfacts mit WVS_data basierend auf dem Ländernamen
WVS_data <- merge(WVS_data, countryfacts[, c("country", "hardship")], by = "country", all.x = TRUE)
# Kontrolle des Zusammengeführten Datensatzes
head(WVS_data)
#Transformation of item risktaking
WVS_data$risktaking = 6 - WVS_data$risktaking + 1
# Define intervals for risktaking
interval <- cut(WVS_data$risktaking, breaks = c(-Inf, 1, 3, 5, Inf), labels = c("Very Low", "Low", "Medium", "High"), include.lowest = TRUE)
# Add the ordinal variable "Risktaking_ordinal" to the data frame
WVS_data$Risktaking_ordinal <- as.factor(interval)
# Display the updated data matrix
print(WVS_data)
WVS_data$T_score_risktaking = 10*scale(WVS_data$risktaking, center=TRUE,scale=TRUE)+50
#Transform risk variable into Z score
# Assuming T-scores have a mean of 50 and a standard deviation of 10
#WVS_data$Z_score_risktaking = (WVS_data$T_score_risktaking - 50) / 10
# Print the resulting data frame
#print(WVS_data)
#WVS_data <- WVS_data %>%
# group_by(country) %>%
# mutate(z_score_age = scale(age))
WVS_data
Check Laura on Items #########
length(unique(WVS_data$country))
[1] 77
nrow(WVS_data)
[1] 149626
range(WVS_data$age, na.rm = TRUE)
[1] 15 99
table(WVS_data$gender)
0 1
71689 77937
table(WVS_data$children)
0 1
44434 105192
table(WVS_data$married)
0 1
66354 83272
table(WVS_data$employed)
0 1
69519 80107
table(WVS_data$education)
0 1
38011 111615
mean(WVS_data$T_score_risktaking)
[1] 50
mean(WVS_data$age)
[1] 41.59569
MIXED-MODELS WVS-DATA ####################
model0 = lmer(risktaking ~ 1 + (1|country),data = WVS_data)
summary_model0=summary(model0)
model1 <- lmer(risktaking ~ 1 + scale(age) + factor(gender) + (1 + scale(age) + factor(gender) | country),
data = WVS_data,
control = lmerControl(optimizer = "bobyqa"))
summary_model1=summary(model1)
print(summary_model1) # Koeffizientenübersicht des Modells anzeigen
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: risktaking ~ 1 + scale(age) + factor(gender) + (1 + scale(age) + factor(gender) | country)
Data: WVS_data
Control: lmerControl(optimizer = "bobyqa")
REML criterion at convergence: 539930.7
Scaled residuals:
Min 1Q Median 3Q Max
-2.4750 -0.7820 -0.0772 0.7523 3.2237
Random effects:
Groups Name Variance Std.Dev. Corr
country (Intercept) 0.16689 0.4085
scale(age) 0.01956 0.1399 0.31
factor(gender)1 0.02207 0.1486 0.12 0.25
Residual 2.15081 1.4666
Number of obs: 149626, groups: country, 77
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.42058 0.04697 76.22138 72.83 <2e-16 ***
scale(age) -0.32274 0.01658 74.09940 -19.46 <2e-16 ***
factor(gender)1 -0.36831 0.01887 73.38879 -19.52 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) scl(g)
scale(age) 0.296
fctr(gndr)1 0.067 0.222
summary_model1 <- summary(model1)
# Gewünschte Werte extrahieren und formatieren
results_model1 <- data.frame(
Predictor = c("Intercept", "Age", "Gender"),
Estimate = c(summary_model1$coefficients["(Intercept)", "Estimate"],
summary_model1$coefficients["scale(age)", "Estimate"],
summary_model1$coefficients["factor(gender)1", "Estimate"]),
SE = c(summary_model1$coefficients["(Intercept)", "Std. Error"],
summary_model1$coefficients["scale(age)", "Std. Error"],
summary_model1$coefficients["factor(gender)1", "Std. Error"]),
T_score = c(summary_model1$coefficients["(Intercept)", "t value"],
summary_model1$coefficients["scale(age)", "t value"],
summary_model1$coefficients["factor(gender)1", "t value"]),
p_value = c(summary_model1$coefficients["(Intercept)", "Pr(>|t|)"],
summary_model1$coefficients["scale(age)", "Pr(>|t|)"],
summary_model1$coefficients["factor(gender)1", "Pr(>|t|)"])
)
# p-Values
results_model1$p_value <- ifelse(results_model1$p_value < 0.001, "< .001", sprintf("%.3f", results_model1$p_value))
print(results_model1)
model2 = lmer(risktaking ~ 1+scale(age)+factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education)+ (1+scale(age)+factor(gender)+ factor(children) + factor(married) + factor(employed) + factor(education)|country),data = WVS_data,control=lmerControl(optCtrl=list(maxfun=30000),optimizer="bobyqa"))
summary_model2=summary(model2)
print(summary_model2)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: risktaking ~ 1 + scale(age) + factor(gender) + factor(children) +
factor(married) + factor(employed) + factor(education) +
(1 + scale(age) + factor(gender) + factor(children) + factor(married) +
factor(employed) + factor(education) | country)
Data: WVS_data
Control: lmerControl(optCtrl = list(maxfun = 30000), optimizer = "bobyqa")
REML criterion at convergence: 538463.9
Scaled residuals:
Min 1Q Median 3Q Max
-2.56649 -0.78106 -0.08451 0.74253 3.15001
Random effects:
Groups Name Variance Std.Dev. Corr
country (Intercept) 0.139152 0.37303
scale(age) 0.013141 0.11464 0.23
factor(gender)1 0.023395 0.15295 0.00 0.25
factor(children)1 0.021111 0.14530 0.09 0.17 0.08
factor(married)1 0.010402 0.10199 0.18 0.45 0.54 0.25
factor(employed)1 0.007348 0.08572 0.01 0.06 0.03 -0.29 -0.23
factor(education)1 0.015081 0.12281 -0.17 0.07 0.10 -0.15 0.08 0.19
Residual 2.125987 1.45808
Number of obs: 149626, groups: country, 77
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.52589 0.04496 77.42844 78.415 < 2e-16 ***
scale(age) -0.23515 0.01418 72.33362 -16.584 < 2e-16 ***
factor(gender)1 -0.34507 0.01948 73.82618 -17.718 < 2e-16 ***
factor(children)1 -0.20808 0.02069 72.08694 -10.056 2.30e-15 ***
factor(married)1 -0.13608 0.01562 62.66677 -8.712 2.13e-12 ***
factor(employed)1 0.01736 0.01331 68.39340 1.304 0.197
factor(education)1 0.12489 0.01869 51.44582 6.683 1.66e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) scl(g) fctr(g)1 fctr(c)1 fctr(mr)1 fctr(mp)1
scale(age) 0.202
fctr(gndr)1 -0.048 0.223
fctr(chld)1 0.001 0.029 0.015
fctr(mrrd)1 0.104 0.308 0.386 -0.049
fctr(mply)1 -0.055 0.083 0.088 -0.209 -0.154
fctr(dctn)1 -0.263 0.106 0.074 -0.081 0.036 0.065
summary_model2 <- summary(model2)
# Gewünschte Werte extrahieren und formatieren
results_model2 <- data.frame(
Predictor = c("Intercept", "Age", "Gender", "Parental status", "Marital status", "Occupational status", "Education"),
Estimate = c(summary_model2$coefficients["(Intercept)", "Estimate"],
summary_model2$coefficients["scale(age)", "Estimate"],
summary_model2$coefficients["factor(gender)1", "Estimate"],
summary_model2$coefficients["factor(children)1", "Estimate"],
summary_model2$coefficients["factor(married)1", "Estimate"],
summary_model2$coefficients["factor(employed)1", "Estimate"],
summary_model2$coefficients["factor(education)1", "Estimate"]),
SE = c(summary_model2$coefficients["(Intercept)", "Std. Error"],
summary_model2$coefficients["scale(age)", "Std. Error"],
summary_model2$coefficients["factor(gender)1", "Std. Error"],
summary_model2$coefficients["factor(children)1", "Std. Error"],
summary_model2$coefficients["factor(married)1", "Std. Error"],
summary_model2$coefficients["factor(employed)1", "Std. Error"],
summary_model2$coefficients["factor(education)1", "Std. Error"]),
T_score = c(summary_model2$coefficients["(Intercept)", "t value"],
summary_model2$coefficients["scale(age)", "t value"],
summary_model2$coefficients["factor(gender)1", "t value"],
summary_model2$coefficients["factor(children)1", "t value"],
summary_model2$coefficients["factor(married)1", "t value"],
summary_model2$coefficients["factor(employed)1", "t value"],
summary_model2$coefficients["factor(education)1", "t value"]),
p_value = c(summary_model2$coefficients["(Intercept)", "Pr(>|t|)"],
summary_model2$coefficients["scale(age)", "Pr(>|t|)"],
summary_model2$coefficients["factor(gender)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(children)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(married)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(employed)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(education)1", "Pr(>|t|)"])
)
# p-Values
results_model2$p_value <- ifelse(results_model2$p_value < 0.001, "< .001", sprintf("%.3f", results_model2$p_value))
print(results_model2)
model3 <- lmer(risktaking ~ 1+scale(age)*hardship+factor(gender)*hardship + factor(children) + factor(married) + factor(employed) + factor(education)+ (1+scale(age)+factor(gender)+ factor(children) + factor(married) + factor(employed) + factor(education)|country),data = WVS_data,control=lmerControl(optCtrl=list(maxfun=30000),optimizer="bobyqa"),REML = FALSE)
summary_model3=summary(model3)
print(summary_model3)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: risktaking ~ 1 + scale(age) * hardship + factor(gender) * hardship +
factor(children) + factor(married) + factor(employed) + factor(education) +
(1 + scale(age) + factor(gender) + factor(children) + factor(married) +
factor(employed) + factor(education) | country)
Data: WVS_data
Control: lmerControl(optCtrl = list(maxfun = 30000), optimizer = "bobyqa")
AIC BIC logLik deviance df.resid
506676.3 507060.7 -253299.1 506598.3 140899
Scaled residuals:
Min 1Q Median 3Q Max
-2.5689 -0.7802 -0.0814 0.7397 3.1587
Random effects:
Groups Name Variance Std.Dev. Corr
country (Intercept) 0.143188 0.37840
scale(age) 0.011997 0.10953 0.25
factor(gender)1 0.018302 0.13528 0.04 0.08
factor(children)1 0.019692 0.14033 0.12 0.12 0.04
factor(married)1 0.008165 0.09036 0.38 0.30 0.36 0.24
factor(employed)1 0.007840 0.08855 -0.04 0.15 0.09 -0.27 -0.16
factor(education)1 0.013980 0.11824 -0.17 0.10 -0.05 -0.16 0.11 0.18
Residual 2.117988 1.45533
Number of obs: 140938, groups: country, 71
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.52598 0.04742 70.65620 74.351 < 2e-16 ***
scale(age) -0.23350 0.01420 66.90661 -16.447 < 2e-16 ***
hardship -0.02124 0.05643 69.58929 -0.376 0.70776
factor(gender)1 -0.33971 0.01839 64.47497 -18.474 < 2e-16 ***
factor(children)1 -0.20928 0.02095 67.91734 -9.991 5.74e-15 ***
factor(married)1 -0.13235 0.01513 60.47161 -8.749 2.46e-12 ***
factor(employed)1 0.01692 0.01409 63.41965 1.202 0.23399
factor(education)1 0.12794 0.01900 49.53716 6.733 1.64e-08 ***
scale(age):hardship 0.04544 0.01753 66.92025 2.592 0.01172 *
hardship:factor(gender)1 0.06868 0.02241 65.02969 3.064 0.00317 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) scl(g) hrdshp fctr(g)1 fctr(c)1 fctr(mr)1 fctr(mp)1 fctr(d)1 scl():
scale(age) 0.225
hardship -0.034 0.010
fctr(gndr)1 -0.022 0.085 0.007
fctr(chld)1 0.025 -0.011 -0.002 -0.016
fctr(mrrd)1 0.231 0.190 -0.008 0.249 -0.073
fctr(mply)1 -0.086 0.146 0.005 0.131 -0.204 -0.117
fctr(dctn)1 -0.262 0.126 0.011 -0.028 -0.085 0.049 0.057
scl(g):hrds 0.009 -0.014 0.253 0.000 0.005 -0.009 -0.031 -0.001
hrdshp:f()1 0.007 0.007 -0.086 -0.030 -0.003 -0.013 0.019 -0.005 0.023
# Zusammenfassung des Modells anzeigen
summary_model3 <- summary(model3)
# Gewünschte Werte extrahieren und formatieren
results_model3 <- data.frame(
Predictor = c("Intercept", "Age", "Gender", "Parental status", "Marital status", "Occupational status", "Education", "Hardship", "Interaction: Gender * Hardship"),
Estimate = c(summary_model3$coefficients["(Intercept)", "Estimate"],
summary_model3$coefficients["scale(z_score_age)", "Estimate"],
summary_model3$coefficients["factor(gender)1", "Estimate"],
summary_model3$coefficients["factor(children)1", "Estimate"],
summary_model3$coefficients["factor(married)1", "Estimate"],
summary_model3$coefficients["factor(employed)1", "Estimate"],
summary_model3$coefficients["factor(education)1", "Estimate"],
summary_model3$coefficients["hardship", "Estimate"],
summary_model3$coefficients["hardship:factor(gender)1", "Estimate"]),
SE = c(summary_model3$coefficients["(Intercept)", "Std. Error"],
summary_model3$coefficients["scale(z_score_age)", "Std. Error"],
summary_model3$coefficients["factor(gender)1", "Std. Error"],
summary_model3$coefficients["factor(children)1", "Std. Error"],
summary_model3$coefficients["factor(married)1", "Std. Error"],
summary_model3$coefficients["factor(employed)1", "Std. Error"],
summary_model3$coefficients["factor(education)1", "Std. Error"],
summary_model3$coefficients["hardship", "Std. Error"],
summary_model3$coefficients["hardship:factor(gender)1", "Std. Error"]),
T_score = c(summary_model3$coefficients["(Intercept)", "t value"],
summary_model3$coefficients["scale(z_score_age)", "t value"],
summary_model3$coefficients["factor(gender)1", "t value"],
summary_model3$coefficients["factor(children)1", "t value"],
summary_model3$coefficients["factor(married)1", "t value"],
summary_model3$coefficients["factor(employed)1", "t value"],
summary_model3$coefficients["factor(education)1", "t value"],
summary_model3$coefficients["hardship", "t value"],
summary_model3$coefficients["hardship:factor(gender)1", "t value"]),
p_value = c(summary_model3$coefficients["(Intercept)", "Pr(>|t|)"],
summary_model3$coefficients["scale(z_score_age)", "Pr(>|t|)"],
summary_model3$coefficients["factor(gender)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(children)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(married)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(employed)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(education)1", "Pr(>|t|)"],
summary_model3$coefficients["hardship", "Pr(>|t|)"],
summary_model3$coefficients["hardship:factor(gender)1", "Pr(>|t|)"])
)
# Formatierung der p-Werte
results_model3$p_value <- ifelse(results_model3$p_value < 0.001, "< .001", sprintf("%.3f", results_model3$p_value))
# Ergebnisse anzeigen
print(results_model3)
anova(model0,model1)
refitting model(s) with ML (instead of REML)
Data: WVS_data
Models:
model0: risktaking ~ 1 + (1 | country)
model1: risktaking ~ 1 + scale(age) + factor(gender) + (1 + scale(age) + factor(gender) | country)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model0 3 549268 549298 -274631 549262
model1 10 539934 540033 -269957 539914 9348.1 7 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(model1,model2)
refitting model(s) with ML (instead of REML)
Data: WVS_data
Models:
model1: risktaking ~ 1 + scale(age) + factor(gender) + (1 + scale(age) + factor(gender) | country)
model2: risktaking ~ 1 + scale(age) + factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education) + (1 + scale(age) + factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education) | country)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model1 10 539934 540033 -269957 539914
model2 36 538493 538850 -269210 538421 1492.9 26 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(model2,model3)
Error in anova.merMod(model2, model3) :
models were not all fitted to the same size of dataset
WVS_data
coefsallmodels=rbind(summary_model1$coefficients,
summary_model2$coefficients,
summary_model3$coefficients[c(1:2,4:8,3,9:10),])
write.csv(coefsallmodels,"coefsallmodels.csv")
# Check the data merging process
merging_process <- "Insert code to check data merging process"
# Display the merging process
print(merging_process)
[1] "Insert code to check data merging process"
# Extrahieren der Koeffizienten-Tabelle für jedes Modell
coefficients_model0 <- summary(model0)$coefficients
coefficients_model1 <- summary(model1)$coefficients
coefficients_model2 <- summary(model2)$coefficients
coefficients_model3 <- summary(model3)$coefficients
# Filtern der erforderlichen Zeilen aus den Koeffizienten
coefficients_model0 <- coefficients_model0[rownames(coefficients_model0) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)"), ]
coefficients_model1 <- coefficients_model1[rownames(coefficients_model1) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)"), ]
coefficients_model2 <- coefficients_model2[rownames(coefficients_model2) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)", "factor(children)", "factor(married)", "factor(employed)", "factor(education)"), ]
coefficients_model3 <- coefficients_model3[rownames(coefficients_model3) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)", "factor(children)", "factor(married)", "factor(employed)", "factor(education)", "hardship", "scale(z_score_age):hardship", "factor(gender):hardship"), ]
# Zusammenführen der geschätzten Koeffizienten aus allen Modellen
coefs_all_models <- rbind(coefficients_model0, coefficients_model1, coefficients_model2, coefficients_model3)
# Erstellen einer Tabelle aus den Koeffizienten
results_table <- data.frame(
Predictor = rownames(coefs_all_models),
b = coefs_all_models[, "Estimate"],
SE = coefs_all_models[, "Std. Error"],
T_score = coefs_all_models[, "t value"],
p_value = coefs_all_models[, "Pr(>|t|)"]
)
# Drucken der Ergebnistabelle
results_table